## Functions

## For "kickout_experiment.R"
## The Sainte-Lague PR method calculator
saintelague <- function (lvote, m) {
  lvote <- cbind(lvote, lvote / 3)
  lvote_rank <- t(apply(lvote, 1, order, decreasing = TRUE))
  topm_1 <- apply(lvote_rank[, 1:m], 1, function (x) {1 %in% x})
  topm_2 <- apply(lvote_rank[, 1:m], 1, function (x) {2 %in% x})
  topm_3 <- apply(lvote_rank[, 1:m], 1, function (x) {3 %in% x})
  topm_4 <- apply(lvote_rank[, 1:m], 1, function (x) {4 %in% x})
  topm_5 <- apply(lvote_rank[, 1:m], 1, function (x) {5 %in% x})
  runnerup0 <- c(lvote_rank[, m + 1])
  runnerup <- ifelse(runnerup0 > 5, runnerup0 - 5, runnerup0)
  rankfirst <- c(lvote_rank[, 1])
  ranksecond0 <- c(lvote_rank[, 2])
  ranksecond <- ifelse(ranksecond0 > 5, ranksecond0 - 5, ranksecond0)
  rankthird0 <- c(lvote_rank[, 3])
  rankthird <- ifelse(rankthird0 > 5, rankthird0 - 5, rankthird0)
  n_topmplus1 <- apply(lvote_rank[, 1:(m + 1)], 1, function (x) {sum(c(1:5) %in% x)})
  n_topmplus1[is.na(lvote[, 1]) == 1] <- NA
  notrunuptopm_1 <- topm_1 * (runnerup != 1)
  notrunuptopm_2 <- topm_2 * (runnerup != 2)
  notrunuptopm_3 <- topm_3 * (runnerup != 3)
  notrunuptopm_4 <- topm_4 * (runnerup != 4)
  notrunuptopm_5 <- topm_5 * (runnerup != 5)
  lastwin0 <- c(lvote_rank[, m])
  lastwin <- ifelse(lastwin0 > 5, lastwin0 - 5, lastwin0)
  nondiscriminate <- numeric(nrow(lvote))
  for (i in 1:nrow(lvote)) {
    nondiscriminate[i] <- (lvote[i, lastwin[i]] == lvote[i, runnerup[i]])
  }
  data.frame(topm_1, topm_2, topm_3, topm_4, topm_5, 
             runnerup, rankfirst, ranksecond, rankthird, n_topmplus1,
             notrunuptopm_1, notrunuptopm_2, notrunuptopm_3,
             notrunuptopm_4, notrunuptopm_5,
             nondiscriminate)
}

## Preference calculator
preftop <- function (data) {
  m <- data$M[1]
  pref <- data %>% dplyr::select(pref_1, pref_2, pref_3, pref_4, pref_5)
  topm <- data %>% dplyr::select(topm_1, topm_2, topm_3, topm_4, topm_5)
  pref_topm <- pref * topm
  runnerupmatrix <- diag(5)[data$runnerup, ]
  topmplus1matrix <- topm + runnerupmatrix
  pref_topmplus1 <- pref * topmplus1matrix
  pref_topmplus1_first <- apply(pref_topmplus1, 1, sort)[5, ]
  pref_topmplus1_second <- apply(pref_topmplus1, 1, sort)[4, ]
  pref_topmplus1_third <- apply(pref_topmplus1, 1, sort)[3, ]
  pref_topmplus1_fourth <- apply(pref_topmplus1, 1, sort)[2, ]
  pref_runnerup <- apply(pref * runnerupmatrix, 1, sum)
  topmplus1_first <- apply(pref_topmplus1, 1, order)[5, ]
  topmplus1_second <- apply(pref_topmplus1, 1, order)[4, ]
  notrunuptopm <- data %>% 
                  dplyr::select(notrunuptopm_1, notrunuptopm_2, notrunuptopm_3,
                                notrunuptopm_4, notrunuptopm_5)
  pref_notrunuptopm <- pref * notrunuptopm
  pref_notrunuptopm_first <- apply(pref_notrunuptopm, 1, sort)[5, ]
  pref_notrunuptopm_second <- apply(pref_notrunuptopm, 1, sort)[4, ]
  pref_notrunuptopm_third <- apply(pref_notrunuptopm, 1, sort)[3, ]
  notrunuptopm_first <- apply(pref_notrunuptopm, 1, order)[5, ]
  notrunuptopm_second <- apply(pref_notrunuptopm, 1, order)[4, ]
  data.frame(pref_topmplus1_first, pref_topmplus1_second, 
             pref_topmplus1_third, pref_topmplus1_fourth, 
             pref_runnerup,
             topmplus1_first, topmplus1_second,
             pref_notrunuptopm_first, pref_notrunuptopm_second, 
             pref_notrunuptopm_third,
             notrunuptopm_first, notrunuptopm_second)
}

## glm() with cluster-robust standard errors
glm_cluster <- function(data, formula, cluster, family) {
  res <- stats::glm(data = data, formula = formula, family = family)
  cluster_var <- data[, cluster, drop = TRUE]
  vcov2 <- sandwich::vcovCL(x = res, cluster = data.frame(cluster_var))
  res <- list(coefficients = res$coefficients, vcov = vcov2)
  res
}

## Number of observations and clusters (assuming no missing values)
nobsnclus <- function (data) {
  cat("Number of observations: ")
  print(nrow(data))
  cat("Number of clusters: ")
  print(length(unique(data$cluster)))
  invisible(c(nobs = nrow(data), ncluster = length(unique(data$cluster))))
}

## Summarise the results and arrange them for LaTeX tables
summary_cluster_a <- function (data, formula, type) {
  result <- glm_cluster(data = data, 
                        formula = formula, 
                        cluster = "cluster", 
                        family = binomial(probit))
  resnobs <- paste0(nobsnclus(data))
  rescoef <- as.character(round(result$coefficients * 100, 3))
  resse <- as.character(round(sqrt(diag(result$vcov)) * 100, 3))
  rescoef[str_length(str_remove(rescoef, "-")) == 4] <- 
      paste0(rescoef[str_length(str_remove(rescoef, "-")) == 4], "0")
  rescoef[str_length(str_remove(rescoef, "-")) == 3] <- 
      paste0(rescoef[str_length(str_remove(rescoef, "-")) == 3], "00")
  resse[str_length(resse) == 4] <- paste0(resse[str_length(resse) == 4], "0")
  if (type == "a") {
    res <- c(rescoef[2:4], paste0("(", resse[2:4], ")"))[c(1, 4, 2, 5, 3, 6)]
    res[5:6] <- paste0("\\textbf{", res[5:6], "}")
  } else if (type == "b2") {
    res <- c(rescoef[2:5], paste0("(", resse[2:5], ")"))[c(1, 5, 2, 6, 3, 7, 4, 8)]
    res[5:8] <- paste0("\\textbf{", res[5:8], "}")
  } else {
    res <- c(rescoef[2:5], paste0("(", resse[2:5], ")"))[c(1, 5, 2, 6, 3, 7, 4, 8)]
    res[7:8] <- paste0("\\textbf{", res[7:8], "}")
  }
  res <- c(res, resnobs)
  res
}

## Average marginal effect estimator for results estimated by glm_cluster() function
ame_cluster <- function (object, sim_data, treat, B = 10000, xlab, ylab, ylab2, y2scale, histobreaks) {
  estdata <- data.frame(sim_data)
  L <- length(treat)
  beta <- mvrnorm(B, mu = object$coefficients, Sigma = object$vcov)
  boot <- numeric(B)
  boot2 <- matrix(, nrow = L, ncol = 3)
  for(i in 1:L){
    sim_data[, 2] <- treat[i]
    for(j in 1:B){
      boot[j] <- mean(pnorm(sim_data %*% beta[j, ]))
    }
    boot2[i, ] <- c(mean(boot), 
                    sort(boot)[c(0.025 * nrow(beta), (0.975 * nrow(beta) + 1))])
  }
  boot2 <- data.frame(treat, boot2)
  colnames(boot2) <- c("x", "pred", "lower", "upper")
  ymax <- ceiling(max(boot2$upper) * 20) / 20
  colnames(estdata)[2] <- "treat"
  maxcount <- max(hist(estdata$treat, breaks = histobreaks)$counts)
  figure <- ggplot(estdata, aes(x = treat, y = stat(count) / maxcount / y2scale * ymax)) +
            geom_histogram(breaks = histobreaks) +
            geom_hline(yintercept = 0) +
            geom_line(data = boot2, aes(x = x, y = pred)) +
            geom_line(data = boot2, aes(x = x, y = upper), linetype = "dashed") +
            geom_line(data = boot2, aes(x = x, y = lower), linetype = "dashed") +
            theme(panel.grid.major.x = element_blank(), 
                  panel.grid.minor.x = element_blank(),
                  panel.grid.major.y = element_blank(), 
                  panel.grid.minor.y = element_blank()) +
            xlab(xlab) +
            scale_y_continuous(name = ylab, 
                               limits = c(NA, ymax),
                               sec.axis = sec_axis(trans = ~ . * maxcount * y2scale / ymax, name = ylab2)) +
            theme_classic() +
            theme(axis.title.x = element_text(size = 21), 
                  axis.title.y = element_text(size = 21),
                  axis.text.x = element_text(size = 20),
                  axis.text.y = element_text(size = 20),
                  legend.title = element_text(size = 24),
                  legend.text = element_text(size = 24),
                  plot.title = element_text(hjust = 0.5, size = 26))
  list(result = boot2, figure = figure)
}

## Average effect estimator where the treatment variable shifts from one value to another
## for the results estimated by glm_cluster() function
amedif_cluster <- function (object, sim_data, treat, B = 10000) {
  if (length(treat) != 2) {
    stop("length of treat must be 2")
  }
  beta <- mvrnorm(B, mu = object$coefficients, Sigma = object$vcov)
  boot <- numeric(B)
  boot2 <- matrix(, nrow = 2, ncol = 3)
  for(i in 1:2){
    sim_data[, 2] <- treat[i]
    for(j in 1:B){
      boot[j] <- mean(pnorm(sim_data %*% beta[j, ]))
    }
    boot2[i, ] <- c(mean(boot), 
                    sort(boot)[c(0.025 * nrow(beta), (0.975 * nrow(beta) + 1))])
    if (i == 1) {
      boot0 <- boot
    }
  }
  boot3 <- data.frame(dif = mean(boot0 - boot),
                      lower = sort(boot0 - boot)[0.025 * nrow(beta)],
                      upper = sort(boot0 - boot)[0.975 * nrow(beta) + 1])
  boot2 <- data.frame(treat, boot2)
  colnames(boot2) <- c("x", "pred", "lower", "upper")
  list(result = boot2, dif = boot3)
}


## For "kickout_romania.R" and "kickout_japan.R"
## Average marginal effect estimator
ame <- function (object, treat, B = 10000, xlab, ylab, ylab2, y2scale, histobreaks) {
  L <- length(treat)
  summaryres <- summary(object)
  beta <- mvrnorm(B, mu = object$coefficients, Sigma = summaryres$cov.scaled)
  sim_data <- object$x
  boot <- numeric(B)
  boot2 <- matrix(, nrow = L, ncol = 3)
  for(i in 1:L){
    sim_data[, 2] <- treat[i]
    for(j in 1:B){
      boot[j] <- mean(pnorm(sim_data %*% beta[j, ]))
    }
    boot2[i, ] <- c(mean(boot), 
                    sort(boot)[c(0.025 * nrow(beta), (0.975 * nrow(beta) + 1))])
  }
  boot2 <- data.frame(treat, boot2)
  colnames(boot2) <- c("x", "pred", "lower", "upper")
  ymax <- ceiling(max(boot2$upper) * 20) / 20
  estdata <- data.frame(object$x)
  colnames(estdata)[2] <- "treat"
  maxcount <- max(hist(estdata$treat, breaks = histobreaks)$counts)
  figure <- ggplot(estdata, aes(x = treat, y = stat(count) / maxcount / y2scale * ymax)) +
            geom_histogram(breaks = histobreaks) +
            geom_hline(yintercept = 0) +
            geom_line(data = boot2, aes(x = x, y = pred)) +
            geom_line(data = boot2, aes(x = x, y = upper), linetype = "dashed") +
            geom_line(data = boot2, aes(x = x, y = lower), linetype = "dashed") +
            theme(panel.grid.major.x = element_blank(), 
                  panel.grid.minor.x = element_blank(),
                  panel.grid.major.y = element_blank(), 
                  panel.grid.minor.y = element_blank()) +
            xlab(xlab) +
            scale_y_continuous(name = ylab, 
                               limits = c(NA, ymax),
                               sec.axis = sec_axis(trans = ~ . * maxcount * y2scale / ymax, name = ylab2)) +
            theme_classic() +
            theme(axis.title.x = element_text(size = 21), 
                  axis.title.y = element_text(size = 21),
                  axis.text.x = element_text(size = 20),
                  axis.text.y = element_text(size = 20),
                  legend.title = element_text(size = 24),
                  legend.text = element_text(size = 24),
                  plot.title = element_text(hjust = 0.5, size = 26))
  list(result = boot2, figure = figure)
}

## Average effect estimator where the treatment variable shifts from one value to another
amedif <- function (object, treat, B = 10000) {
  if (length(treat) != 2) {
    stop("length of treat must be 2")
  }
  summaryres <- summary(object)
  beta <- mvrnorm(B, mu = object$coefficients, Sigma = summaryres$cov.scaled)
  sim_data <- object$x
  boot <- numeric(B)
  boot2 <- matrix(, nrow = 2, ncol = 3)
  for(i in 1:2){
    sim_data[, 2] <- treat[i]
    for(j in 1:B){
      boot[j] <- mean(pnorm(sim_data %*% beta[j, ]))
    }
    boot2[i, ] <- c(mean(boot), 
                    sort(boot)[c(0.025 * nrow(beta), (0.975 * nrow(beta) + 1))])
    if (i == 1) {
      boot0 <- boot
    }
  }
  boot3 <- data.frame(dif = mean(boot0 - boot),
                      lower = sort(boot0 - boot)[0.025 * nrow(beta)],
                      upper = sort(boot0 - boot)[0.975 * nrow(beta) + 1])
  boot2 <- data.frame(treat, boot2)
  colnames(boot2) <- c("x", "pred", "lower", "upper")
  list(result = boot2, dif = boot3)
}


## For "kickout_romania.R"
## the (simple) Hare method
Hare <- function (magnitude, share) {
  share <- share / sum(share)
  quota <- 1 / magnitude
  seat <- floor(share / quota)
  reminder <- share / quota - seat
  r_seat <- magnitude - sum(seat)
  addseat <- order(reminder, decreasing = TRUE)[1:r_seat]
  addseatvec <- rep(0, length(share))
  addseatvec[addseat] <- 1
  seat <- seat + addseatvec
  runup <- c("A", "B", "D", "E")[order(reminder, decreasing = TRUE)[r_seat + 1]]
  lastwin <- c("A", "B", "D", "E")[order(reminder, decreasing = TRUE)[r_seat]]
  threshold <- sort(reminder, decreasing = TRUE)[r_seat]
  reminder <- reminder - threshold
  reminder <- reminder + (reminder < 0)
  reminder[seat == 0] <- 0
  rankfirst <- c("A", "B", "D", "E")[order(reminder, decreasing = TRUE)[1]]
  ranksecond <- c("A", "B", "D", "E")[order(reminder, decreasing = TRUE)[2]]
  rankthird <- c("A", "B", "D", "E")[order(reminder, decreasing = TRUE)[3]]
  nocomp <- (share > 1 / (5 * magnitude))
  n_comp <- sum(nocomp)
  list(seat = seat,
       runup = runup,
       lastwin = lastwin,
       rankfirst = rankfirst,
       ranksecond = ranksecond,
       rankthird = rankthird,
       nocomp = nocomp,
       n_comp = n_comp)
}

## Calculate seat alocation in the first step
Hare2 <- function (magnitude, share, totalvote) {
  sumshare <- sum(share)
  share <- share / sumshare
  quota <- 1 / magnitude
  seat <- floor(share / quota)
  reminder <- (share / quota - seat) * totalvote * sumshare
  r_seat <- magnitude - sum(seat)
  list(seat = seat,
       reminder = reminder,
       r_seat = r_seat)
}

## Seat allocation under the simple Hare method
PRseatROU2004 <- function(data){
  distdata <- data %>%
              group_by(dist) %>%
              summarise(magnitude = mean(magnitude),
                        shareA = mean(shareA),
                        shareB = mean(shareB),
                        shareD = mean(shareD),
                        shareE = mean(shareE)) %>%
              ungroup() %>%
              data.frame()
  seat <- matrix(, nrow = nrow(distdata), ncol = 4)
  runup <- numeric(nrow(distdata))
  lastwin <- numeric(nrow(distdata))
  rankfirst <- numeric(nrow(distdata))
  ranksecond <- numeric(nrow(distdata))
  rankthird <- numeric(nrow(distdata))
  nocomp <- matrix(, nrow = nrow(distdata), ncol = 4)
  n_comp <- numeric(nrow(distdata))
  for (i in 1:nrow(distdata)) {
    share <- unlist(distdata[i, c("shareA", "shareB", "shareD", "shareE")])
    resHare <- Hare(magnitude = distdata$magnitude[i],
                    share = share)
    seat[i, ] <- resHare$seat
    runup[i] <- resHare$runup
    lastwin[i] <- resHare$lastwin
    rankfirst[i] <- resHare$rankfirst
    ranksecond[i] <- resHare$ranksecond
    rankthird[i] <- resHare$rankthird
    nocomp[i, ] <- resHare$nocomp
    n_comp[i] <- resHare$n_comp
  }
  result <- data.frame(distdata$dist, seat, runup, lastwin, rankfirst, ranksecond, rankthird, nocomp, n_comp)
  colnames(result) <- c("dist", 
                        "HseatA", "HseatB", "HseatD", "HseatE", 
                        "runup", "lastwin", "rankfirst", "ranksecond", "rankthird", 
                        "nocompA", "nocompB", "nocompD", "nocompE", 
                        "n_comp")
  result
}

max2 <- function (x, na.rm) {
  ifelse(sum(is.na(x)) == length(x), NA, max(x, na.rm = na.rm))
}

min2 <- function (x, na.rm) {
  ifelse(sum(is.na(x)) == length(x), NA, min(x, na.rm = na.rm))
}

mean2 <- function (x, na.rm) {
  ifelse(sum(is.na(x)) == length(x), NA, mean(x, na.rm = na.rm))
}

## Summarise the results and arrange them for LaTeX tables
summary_r <- function (data, formula, cov, type) {
  result <- glm(data = data, 
                formula = formula, 
                family = binomial(probit))
  resnobs <- nobs(result)
  print(summary(result))
  cat("\nNumber of observations\n")
  print(resnobs)
  result <- summary(result)$coefficients
  result <- round(result * 10, 3)
  rescoef <- as.character(result[, 1])
  resse <- as.character(result[, 2])
  rescoef[str_length(str_remove(rescoef, "-")) == 4] <- 
    paste0(rescoef[str_length(str_remove(rescoef, "-")) == 4], "0")
  resse[str_length(resse) == 3] <- paste0(resse[str_length(resse) == 3], "00")
  resse[str_length(resse) == 4] <- paste0(resse[str_length(resse) == 4], "0")
  if (type == "a") {
    res <- c(rescoef[2:4], paste0("(", resse[2:4], ")"))[c(1, 4, 2, 5, 3, 6)]
    res[5:6] <- paste0("\\textbf{", res[5:6], "}")
  } else {
    res <- c(rescoef[2:5], paste0("(", resse[2:5], ")"))[c(1, 5, 2, 6, 3, 7, 4, 8)]
    res[7:8] <- paste0("\\textbf{", res[7:8], "}")
  }
  cov <- ifelse(cov == TRUE, "$\\checkmark$", NA)
  res <- c(res, "$\\checkmark$", cov, resnobs)
  res
}

## For "kickout_japan.R"
## Summarise the results and arrange them for LaTeX tables
summary_j <- function (data, formula, type, fe) {
  result <- glm(data = data, 
                formula = formula, 
                family = binomial(probit))
  resnobs <- nobs(result)
  print(summary(result))
  cat("\nNumber of observations\n")
  print(resnobs)
  result <- summary(result)$coefficients
  result <- round(result * 100, 3)
  rescoef <- as.character(result[, 1])
  resse <- as.character(result[, 2])
  rescoef[str_length(str_remove(rescoef, "-")) == 4] <- 
    paste0(rescoef[str_length(str_remove(rescoef, "-")) == 4], "0")
  resse[str_length(resse) == 4] <- paste0(resse[str_length(resse) == 4], "0")
  if (type == "c" | type == "pa") {
    res <- c(rescoef[2:5], paste0("(", resse[2:5], ")"))[c(1, 5, 2, 6, 3, 7, 4, 8)]
    res[1:2] <- paste0("\\textbf{", res[1:2], "}")
  } else if (type == "pb") {
    res <- c(rescoef[2:3], paste0("(", resse[2:3], ")"))[c(1, 3, 2, 4)]
    res[1:2] <- paste0("\\textbf{", res[1:2], "}")
  }
  if (fe == 0) {
    res <- c(res, NA, NA)
  } else if (fe == 1) {
    res <- c(res, "$\\checkmark$", NA)
  } else {
    res <- c(res, "$\\checkmark$", "$\\checkmark$")
  }
  res <- c(res, resnobs)
  res
}

